* ===================<Chapters> ===================
*1. Preparation
*2. Data Preparation and Randomization
*3. Merging Datasets
*4. Conducting a Balancing Test
* =================================================


* =========================
* =========================
* 1. Preparation 
* =========================
* =========================


clear
clear matrix

tempfile resi_pop h_pop base_data households h_households some_indicators livestock base_dataset


* =========================
* =========================
* 2. Data Preparation and Randomization
* =========================
* =========================


* =========================
* Prepare dataset(resident_population)
* =========================

* import rawdata
import excel using rawdata\NSO\resipop.xlsx, clear firstrow

* rename
rename BAGBAG Bag_name
rename Y20152015 population

* modify code and name of 1840751 in Bag_name
replace Bag_name = "18407 Bulgan (modified)" if Bag_name == "    1840751  "
replace Bag_name = "1840751 (modified)" if Bag_name == "   18407 Bulgan"

* modify code of 3441659, 3441659 in Bag_name
replace Bag_name = "3441657 (modified)" if Bag_name == "    441657 4-r bag, Suld"
replace Bag_name = "3441659 (modified)" if Bag_name == "    441659 5-r bag, Borkhoi"

* data check
gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
egen total_pop = max(population)
drop if Bag_ID==.
egen sum_pop = total(population)
drop if Bag_ID>5000000
gen check_flag = total_pop - sum_pop
**(if check_flag is not equal to 0, you have to check Bag_ID)



* sampling of target Aimags
gen target=0
replace target=1  if Aimag_ID == 421 | Aimag_ID == 341 | Aimag_ID == 343 | Aimag_ID == 346 | Aimag_ID == 264 | Aimag_ID == 265 | Aimag_ID == 181 | Aimag_ID == 185
label variable target "Target area"
label define target  1 "(1) Targetedd area" 0 "(2) Non-targeted area", replace
label values target target

gen target_test = 2-target 
label define target_test  1 "(1) Targetedd area" 2 "(2) Non-targeted area", replace
label values target_test target_test

save `resi_pop', replace


* =========================
* Prepare dataset(h_pop)
* =========================

* import rawdata
import excel using rawdata\NSO\h_pop.xlsx, clear firstrow

* rename
rename BAG Bag_name
rename Y20152015 h_pop

gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
egen total_hp = max(h_pop)
drop if Bag_ID==.
egen sum_hp = total(h_pop)
drop if Bag_ID>5000000
gen check_flag = total_hp - sum_hp
*(if check_flag is not equal to 0, you have to check Bag_ID)
save `h_pop', replace


* =========================
* Merge dataset(h_pop) with dataset(resident_population)
* =========================

* merge 
clear
use `resi_pop'
merge m:1 Bag_ID using `h_pop'
replace h_pop=0 if h_pop==.
drop _merge
drop if population==.


* gen strata
gen h_rate = h_pop/ population
sort Aimag_ID, stable
by Aimag_ID: egen med_h_rate = median(h_rate)
gen d_h_rate=0
replace d_h_rate=1 if h_rate - med_h_rate>0


* =========================
* Randomization with strata
* =========================

* strata Aimag and h_rate
sort Bag_ID
randtreat Aimag_ID d_h_rate, mult(4) misfits(strata) setseed(1) sortpreserve replace

tabstat population if target==1, by(treatment) stat(count mean median sd min max sum) 
tabstat h_pop if target==1, by(treatment) stat(count mean median sd min max sum) 
xi: reg population i.treatment if target==1, robust
xi: reg h_pop i.treatment if target==1, robust


save `base_data', replace



* =========================
* =========================
* 3. Merging Datasets
* =========================
* =========================

* =========================
* Prepare dataset(households)
* =========================

* import rawdata
import excel using rawdata\NSO\households.xlsx, clear firstrow

* rename
rename BAGBAG Bag_name
rename Y20152015 households

* modify code and name of 1840751 in Bag_name
replace Bag_name = "18407 Bulgan (modified)" if Bag_name == "    1840751  "
replace Bag_name = "1840751 (modified)" if Bag_name == "   18407 Bulgan"

* modify code of 3441659, 3441659 in Bag_name
replace Bag_name = "3441657 (modified)" if Bag_name == "    441657 4-r bag, Suld"
replace Bag_name = "3441659 (modified)" if Bag_name == "    441659 5-r bag, Borkhoi"

* data check
gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
egen total_hh = max(households)
drop if Bag_ID==.
egen sum_hh = total(households)
drop if Bag_ID>5000000
gen check_flag = total_hh - sum_hh
*(if check_flag is not equal to 0, you have to check Bag_ID)
save `households', replace


* =========================
* Prepare dataset(herder_households)
* =========================

* import rawdata
import excel using rawdata\NSO\h_households.xlsx, clear firstrow

* rename
rename BAG Bag_name
rename Y20152015 h_households

gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
egen total_hh = max(h_households)
drop if Bag_ID==.
egen sum_hh = total(h_households)
drop if Bag_ID>5000000
gen check_flag = total_hh - sum_hh
*(if check_flag is not equal to 0, you have to check Bag_ID)
save `h_households', replace


* =========================
* Prepare dataset(some indicators)
* =========================

* import rawdata
import excel using rawdata\NSO\some_indicators.xlsx, sheet("Data") cellrange(A2:M1935) firstrow case(lower) clear

* rename
rename bag Bag_name

* data check
gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
destring sourceofelectricity-handmadefelt, replace ignore(-)


egen total_mp = max(mobilephones)
drop if Bag_ID==.
egen sum_mp = total(mobilephones)
drop if Bag_ID>5000000
gen check_flag = total_mp - sum_mp
*(if check_flag is not equal to 0, you have to check Bag_ID)
save `some_indicators' , replace


* =========================
* Prepare dataset(livestock)
* =========================

import excel using rawdata\NSO\livestock.xlsx, sheet("Data") cellrange(E3:K2042) firstrow case(lower) clear

* rename
rename bag5 Bag_name
rename subsummary total_livestock

gen Bag_ID = regexs(0)  if(regexm(Bag_name, "[0-9][0-9][0-9][0-9][0-9][0-9][0-9]"))
tostring Bag_ID, replace 
gen Soum_ID = substr(Bag_ID,1,5)
gen Aimag_ID = substr(Bag_ID,1,3)
gen s_Bag_ID = substr(Bag_ID,2,6)
destring Bag_ID Soum_ID Aimag_ID s_Bag_ID, replace ignore(-)
egen total_tl = max(total_livestock)
drop if Bag_ID==.
egen sum_tl = total(total_livestock)
drop if Bag_ID>5000000
gen check_flag = total_tl - sum_tl
*(if check_flag is not equal to 0, you have to check Bag_ID)
save `livestock', replace


* =========================
* Merge using other dataset
* =========================

* merge 
use `base_data', clear
merge m:1 Bag_ID using `h_households'
drop _merge
merge m:1 Bag_ID using `households'
drop _merge
merge m:1 Bag_ID using `some_indicators'
drop _merge
merge m:1 Bag_ID using `livestock'
drop _merge


drop if population==.

gen hh_rate = h_households*1000/ households
sort Aimag_ID, stable
by Aimag_ID: egen med_hh_rate = median(hh_rate)
gen d_hh_rate=0
replace d_hh_rate=1 if hh_rate - med_hh_rate>0


egen pop_treat = sum(population) if target==1, by(treatment) 
egen h_pop_treat = sum(h_pop) if target==1, by(treatment) 

egen pop_target = sum(population), by(target) 
egen h_pop_target = sum(h_pop), by(target) 

label variable pop_treat "Total population"
label variable h_pop_treat "Total herder population"

label variable pop_target "Total population"
label variable h_pop_target "Total herder population"

sort Bag_ID


* generating ratio variables
foreach x in total_livestock horse cattle camel sheep goat television radio mobilephones sourceofelectricity {
gen `x'_h_rate = `x'/h_households*0.001
}

save `base_dataset.dta', replace


* =========================
* distribution numbers in each bag
* =========================

* import rawdata
import excel using rawdata\NSO\admin_code.xlsx, clear firstrow


rename АймагНийслэлийнКод NSO_aimag_code
rename АймагНийслэлМонгол NSO_aimag_name
rename АймагНийслэлАнгли NSO_aimag_name_en
rename СумДүүргийнКод NSO_soum_code
rename СумДүүрэгМонгол NSO_soum_name
rename СумДүүрэгАнгли NSO_soum_name_en
rename БагХорооныКод NSO_bag_code
rename БагХорооМонгол NSO_bag_name
rename БагХорооАнгли NSO_bag_name_en

destring NSO_soum_code, replace ignore(-)

gen s_Soum_ID= NSO_aimag_code *100 + NSO_soum_code
gen s_Bag_ID= NSO_aimag_code *10000 + NSO_soum_code *100 + NSO_bag_code
drop if NSO_aimag_code==11
save intermediate/admin_code.dta, replace

use `base_dataset.dta', clear
merge m:m s_Bag_ID using intermediate/admin_code.dta


* modify information of the one not matched(_merge==1)
replace NSO_aimag_code=85 if s_Bag_ID==850173
replace NSO_aimag_name="Увс" if s_Bag_ID==850173
replace NSO_aimag_name_en="Uvs" if s_Bag_ID==850173
replace NSO_soum_code=1 if s_Bag_ID==850173
replace NSO_soum_name="Улаангом сум" if s_Bag_ID==850173
replace NSO_soum_name_en="Ulaangom" if s_Bag_ID==850173
replace NSO_bag_code=73 if s_Bag_ID==850173
replace NSO_bag_name="12-р баг" if s_Bag_ID==850173
replace NSO_bag_name_en="11-r bag" if s_Bag_ID==850173


* generate a category of herder household ratio
gen s_h_households=1
replace s_h_households=2 if h_households>0.050
replace s_h_households=3 if h_households>0.100
replace s_h_households=4 if h_households>0.150

gen s_households=1
replace s_households=2 if households>300


* set distribution numbers based on herder household ratio
gen distribution=40
replace distribution=60 if s_h_households==3 & s_households==1
replace distribution=60 if s_h_households==2 & s_households==2
replace distribution=80 if s_h_households==4 & s_households==1
replace distribution=80 if s_h_households==3 & s_households==2
replace distribution=100 if s_h_households==4 & s_households==2


tabstat distribution if target_test==1, by(treatment) stat(count mean median sd min max sum) 


* ===========================				   
* Labeling
* ===========================				   
label variable population "Population"
label variable households "Number of households"
label variable h_pop "Herder population"
label variable h_rate "Herder population rate"
label variable hh_rate "Herder household rate" 
label variable horse "Horse"
label variable cattle "Cattle"
label variable camel "Camel"
label variable sheep "Sheep" 
label variable goat "Goat"



label variable sourceofelectricity "Source of electricity"
label variable solarenergy "Solar energy"
label variable windenergy "Wind energy"
label variable smallsizedgenerators "Small sized generators"
label variable centralpowersystem "Central power system"
label variable other "Other electricity"
label variable satelliteantenna "Satellite antenna"
label variable television "Television"
label variable radio "Radio"
label variable sewingmachine "Sewing machine"
label variable mobilephones "Mobilephones"
label variable handmadefelt "Handmadefelt"


label variable treatment "explanatory material"
label define treatment 0 "Control" 1 "Disability and survivors pension treatment" 2 "Trust treatment" 3 "Mobile banking treatment" 
label values  treatment treatment

drop _merge
drop _Itreatment_1 _Itreatment_2 _Itreatment_3
save intermediate/target_bags, replace
use intermediate/target_bags, replace


* =========================
* =========================
* 4. Conducting a Balancing Test
* =========================
* =========================

* variable changing order
gen  orth_intervention = 1 if treatment == 0
replace  orth_intervention = 2 if treatment == 1
replace  orth_intervention = 3 if treatment == 3
** changing order
replace  orth_intervention = 4 if treatment == 2
** changing order

label define orth_intervention 1 "Control" 2 "Disability and survivors pension" 3 "Mobile banking" 4 "Trust" , replace
label values orth_intervention orth_intervention


* Summary statistics
eststo clear
bysort orth_intervention: eststo: estpost sum population households h_pop h_rate hh_rate horse cattle camel sheep goat  if target==1


* Balancing test
orth_out population households h_pop h_rate hh_rate horse cattle camel sheep goat television mobilephones using results/balancing_test_NSO_treat.tex if target==1, by(orth_intervention) vce(robust) count se test replace stars bdec(2) title(Balancing test of the randomization (using public statisitics data)) note(Each column reports means of each variables at village unit. Standard errors are in parentheses. Except for rate variables, the unit of each variable is the number per village. The right column reports the results of joint orthogonality tests on the treatment arms with the p-value. * Significant at the 10% level. ** Significant at the 5% level. *** Significant at the 1% level. Source: the National Statistics Office of Mongolia.) latex



* Counting numbers
keep if target==1
keep orth_intervention s_Bag_ID
duplicates drop 
tab orth_intervention



